From 96b740e209d0bea4c16d93211ceb139fc98d10c2 Mon Sep 17 00:00:00 2001 From: Tim Deegan Date: Thu, 28 Jul 2011 13:45:09 +0100 Subject: [PATCH] x86/mm: Handle 1GiB superpages in the pagetable walker. This allows HAP guests to use 1GiB superpages. Shadow and PV guests still can't use them without more support in shadow/* and mm.c. Signed-off-by: Christoph Egger Signed-off-by: Tim Deegan --- xen/arch/x86/hvm/hvm.c | 10 +++-- xen/arch/x86/mm/guest_walk.c | 68 ++++++++++++++++++++++++++-------- xen/include/asm-x86/guest_pt.h | 11 ++++++ xen/include/asm-x86/hvm/hvm.h | 5 +++ 4 files changed, 75 insertions(+), 19 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 34af9d13df..43994d1465 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2385,6 +2385,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { struct vcpu *v = current; + struct domain *d = v->domain; unsigned int count = *ecx; if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) ) @@ -2393,7 +2394,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, if ( cpuid_hypervisor_leaves(input, count, eax, ebx, ecx, edx) ) return; - domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx); + domain_cpuid(d, input, *ecx, eax, ebx, ecx, edx); switch ( input ) { @@ -2429,7 +2430,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, { if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) ) continue; - domain_cpuid(v->domain, input, sub_leaf, &_eax, &_ebx, &_ecx, + domain_cpuid(d, input, sub_leaf, &_eax, &_ebx, &_ecx, &_edx); if ( (_eax + _ebx) > *ebx ) *ebx = _eax + _ebx; @@ -2440,9 +2441,12 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, case 0x80000001: /* We expose RDTSCP feature to guest only when tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */ - if ( v->domain->arch.tsc_mode != TSC_MODE_DEFAULT || + if ( d->arch.tsc_mode != TSC_MODE_DEFAULT || !host_tsc_is_safe() ) *edx &= ~cpufeat_mask(X86_FEATURE_RDTSCP); + /* Hide 1GB-superpage feature if we can't emulate it. */ + if (!hvm_pse1gb_supported(d)) + *edx &= ~cpufeat_mask(X86_FEATURE_PAGE1GB); break; } } diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c index 9d02b25705..2f9327714c 100644 --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -134,7 +134,8 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, guest_l4e_t *l4p; #endif uint32_t gflags, mflags, iflags, rc = 0; - int pse, smep; + int smep; + bool_t pse1G = 0, pse2M = 0; perfc_incr(guest_walk); memset(gw, 0, sizeof(*gw)); @@ -181,6 +182,37 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; + + pse1G = (gflags & _PAGE_PSE) && guest_supports_1G_superpages(v); + + if ( pse1G ) + { + /* Generate a fake l1 table entry so callers don't all + * have to understand superpages. */ + gfn_t start = guest_l3e_get_gfn(gw->l3e); + /* Grant full access in the l1e, since all the guest entry's + * access controls are enforced in the l3e. */ + int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| + _PAGE_ACCESSED|_PAGE_DIRTY); + /* Import cache-control bits. Note that _PAGE_PAT is actually + * _PAGE_PSE, and it is always set. We will clear it in case + * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */ + flags |= (guest_l3e_get_flags(gw->l3e) + & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD)); + if ( !(gfn_x(start) & 1) ) + /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ + flags &= ~_PAGE_PAT; + + if ( gfn_x(start) & GUEST_L3_GFN_MASK & ~0x1 ) + rc |= _PAGE_INVALID_BITS; + + /* Increment the pfn by the right number of 4k pages. */ + start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) + + ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK)); + gw->l1e = guest_l1e_from_gfn(start, flags); + gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN); + goto set_ad; + } #else /* PAE only... */ @@ -219,10 +251,9 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, if ( rc & _PAGE_PRESENT ) goto out; - pse = (guest_supports_superpages(v) && - (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); + pse2M = (gflags & _PAGE_PSE) && guest_supports_superpages(v); - if ( pse ) + if ( pse2M ) { /* Special case: this guest VA is in a PSE superpage, so there's * no guest l1e. We make one up so that the propagation code @@ -242,9 +273,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */ flags &= ~_PAGE_PAT; -#define GUEST_L2_GFN_ALIGN (1 << (GUEST_L2_PAGETABLE_SHIFT - \ - GUEST_L1_PAGETABLE_SHIFT)) - if ( gfn_x(start) & (GUEST_L2_GFN_ALIGN - 1) & ~0x1 ) + if ( gfn_x(start) & GUEST_L2_GFN_MASK & ~0x1 ) { #if GUEST_PAGING_LEVELS == 2 /* @@ -262,7 +291,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, /* Increment the pfn by the right number of 4k pages. * Mask out PAT and invalid bits. */ - start = _gfn((gfn_x(start) & ~(GUEST_L2_GFN_ALIGN - 1)) + + start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) + guest_l1_table_offset(va)); gw->l1e = guest_l1e_from_gfn(start, flags); gw->l1mfn = _mfn(INVALID_MFN); @@ -282,6 +311,9 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, rc |= ((gflags & mflags) ^ mflags); } +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ +set_ad: +#endif /* Now re-invert the user-mode requirement for SMEP. */ if ( smep ) rc ^= _PAGE_USER; @@ -295,17 +327,21 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) paging_mark_dirty(d, mfn_x(gw->l4mfn)); - if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) ) + if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, + (pse1G && (pfec & PFEC_write_access))) ) paging_mark_dirty(d, mfn_x(gw->l3mfn)); #endif - if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, - (pse && (pfec & PFEC_write_access))) ) - paging_mark_dirty(d, mfn_x(gw->l2mfn)); - if ( !pse ) + if ( !pse1G ) { - if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, - (pfec & PFEC_write_access)) ) - paging_mark_dirty(d, mfn_x(gw->l1mfn)); + if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e, + (pse2M && (pfec & PFEC_write_access))) ) + paging_mark_dirty(d, mfn_x(gw->l2mfn)); + if ( !pse2M ) + { + if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, + (pfec & PFEC_write_access)) ) + paging_mark_dirty(d, mfn_x(gw->l1mfn)); + } } } diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h index e1e3a450fb..e3dd07a8d7 100644 --- a/xen/include/asm-x86/guest_pt.h +++ b/xen/include/asm-x86/guest_pt.h @@ -177,6 +177,11 @@ static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags) #endif /* GUEST_PAGING_LEVELS != 2 */ +/* Mask of the GFNs covered by an L2 or L3 superpage */ +#define GUEST_L2_GFN_MASK (GUEST_L1_PAGETABLE_ENTRIES - 1) +#define GUEST_L3_GFN_MASK \ + ((GUEST_L2_PAGETABLE_ENTRIES * GUEST_L1_PAGETABLE_ENTRIES) - 1) + /* Which pagetable features are supported on this vcpu? */ @@ -193,6 +198,12 @@ guest_supports_superpages(struct vcpu *v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); } +static inline int +guest_supports_1G_superpages(struct vcpu *v) +{ + return (GUEST_PAGING_LEVELS >= 4 && hvm_pse1gb_supported(v->domain)); +} + static inline int guest_supports_nx(struct vcpu *v) { diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 47f9bc9ab8..07458afd32 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -219,11 +219,16 @@ int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode); #define hvm_nx_enabled(v) \ (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX)) +/* Can we use superpages in the HAP p2m table? */ #define hvm_hap_has_1gb(d) \ (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_1GB) #define hvm_hap_has_2mb(d) \ (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_2MB) +/* Can the guest use 1GB superpages in its own pagetables? */ +#define hvm_pse1gb_supported(d) \ + (cpu_has_page1gb && paging_mode_hap(d)) + #ifdef __x86_64__ #define hvm_long_mode_enabled(v) \ ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA) -- 2.30.2